Those are three datasets we are using in this analysis report.
OurAirports
OurAirports is a free site where visitors can explore the world’s airports, read other peoplpe’s comments, and leave their own. The site is dedicated to both passengers and pilots. Users can find any airports around the world. The site started in 2007 to create a good source of global aviation data available to anyone.
OpenFlights
OpenFlights is a tool that lets users map their flights around the world, seach and filter them in all sorts of interesting ways, calculate statistics automatically, and share yout flights and trips with friends and the entire world. Also, OpenFlights is the open-source project to build the tool.
library(data.table) # fast data import
library(tidyverse) # data manipulation
library(plotly) # interactive visualizations
library(janitor) # data manipulation
library(stringr) # character class data manipulation
library(treemap) # tree map visualization
library(igraph)
library(gridExtra)
library(ggraph)
getwd()
## [1] "C:/Users/USER/Desktop/Airline_Analysis"
setwd("C:\\Users\\USER\\Desktop\\Airline_Analysis\\input")
airport <- read_csv("airports-extended.csv", col_names = F)
names(airport) <- c("Airpot_ID", "Airport_Name", "City", "Country", "IATA",
"ICAO", "Latitude", "Longitude", "Altitude", "Timezone",
"DST", "Tz", "Type", "Source")
airport <- airport %>%
filter(Type == "airport")
airline <- read_csv("airlines.csv") %>%
clean_names()
route <- read_csv("routes.csv") %>%
clean_names()
names(route)[5] <- "destination_airport"
countries <- read_csv("countries of the world.csv")
airport %>%
head(5) %>%
DT::datatable(options = list(
lengthMenu = c(5,3,1)
))
This dataset covers 7750 objects.
airline %>%
head(5) %>%
DT::datatable(options = list(
lengthMenu = c(5,3,1)
))
This dataset covers 6162 objects.
route %>%
head(5) %>%
DT::datatable(options = list(
lengthMenu = c(5,3,1)
))
This dataset covers 135,326 objects.
geo <- list(
scope = "world",
projection = list(type = "orthographic"),
showland = TRUE,
resolution = 100,
landcolor = toRGB("gray90"),
countrycolor = toRGB("gray80"),
oceancolor = toRGB("lightsteelblue2"),
showocean = TRUE
)
plot_geo(locationmode = "Greenwich") %>%
add_markers(data = airport %>%
filter(Type == "airport"),
x = ~Longitude,
y = ~Latitude,
text = ~paste('Airport: ', Airport_Name),
alpha = .5, color = "red") %>%
layout(
title = "Global Airports",
geo = geo,
showlegend = FALSE
)
print(paste("There are", airport %>%
filter(Type == "airport") %>%
nrow(),
"airports around the world."))
## [1] "There are 7750 airports around the world."
There are 7750 airports around the world, according to the dataset.
route <- route %>% mutate(id = rownames(route))
route <- route %>% gather('source_airport', 'destination_airport', key = "Airport_type", value = "Airport")
gloabal.flight.route <- merge(route, airport %>% select(Airport_Name, IATA, Latitude, Longitude, Country, City),
by.x = "Airport", by.y = "IATA")
world.map <- map_data ("world")
world.map <- world.map %>%
filter(region != "Antarctica")
ggplot() +
geom_map(data=world.map, map=world.map,
aes(x=long, y=lat, group=group, map_id=region),
fill="white", colour="black") +
geom_point(data = gloabal.flight.route,
aes(x = Longitude, y = Latitude),
size = .1, alpha = .5, colour = "red") +
geom_line(data = gloabal.flight.route,
aes(x = Longitude, y = Latitude, group = id),
alpha = 0.05, colour = "red") +
labs(title = "Global Airline Routes")
ggplot() +
geom_map(data=world.map, map=world.map,
aes(x=long, y=lat, group=group, map_id=region),
fill="white", colour="grey") +
geom_point(data = airport %>%
filter(Altitude >= 5000),
aes(x = Longitude, y = Latitude, colour = Altitude),
size = .7) +
labs(title = "Airports located over 5,000 feet altitude") +
ylim(-60, 90) +
theme(legend.position = c(.1, .25))
print(paste(airport %>%
filter(Altitude >= 5000) %>%
nrow(),
"airports are located over 5,000 feet altitude."))
## [1] "298 airports are located over 5,000 feet altitude."
There are 298 airports that are located over 5,000 feet all over the world. Those are mainly distributed in the montanious areas such as Rocky, Andes, Himalaya… Also, Papua Newgenea has a few airports over 5,000 feet.
connection.route <- route %>%
spread(key = Airport_type, value = Airport) %>%
select(destination_airport, source_airport, id)
airport.country <- airport %>%
select(City, Country, IATA)
flight.connection <- merge(connection.route, airport.country, by.x = "source_airport", by.y = "IATA")
names(flight.connection)[4:5] <- c("source.City", "source.Country")
flight.connection <- merge(flight.connection, airport.country, by.x = "destination_airport", by.y = "IATA")
names(flight.connection)[6:7] <- c("destination.City", "destination.Country")
flight.connection <- flight.connection %>%
select(id, contains("source"), contains("destination"))
data.frame(table(airport$Country)) %>%
arrange(desc(Freq)) %>%
head(20) %>%
ggplot(aes(x = reorder(Var1, -Freq), y = Freq, fill = Var1, label = Freq)) +
geom_bar(stat = "identity", show.legend = F) +
labs(title = "Top 20 Countries that has most Airports",
x = "Country", y = "The number of Airports") +
geom_label(angle = 45, show.legend = F) +
theme(axis.text.x = element_text(angle = 40, size = 15))
United States has by far the most airports. Probably this is because united states has many military bases around the world. By the way, nations with bigger territories, such as Russia, Canada has many airports because they need them to have access to remote cities. However, small countries such as Japan also ranked in top 20 countries. The number of airports are affected by how large a country is and how good the economy is, we can say.
treemap(data.frame(table(airport$Country)),
index="Var1",
vSize="Freq",
type="index",
title = "Overall Number of Airport owned by each Nation")
data.frame(table(airline$country)) %>%
arrange(desc(Freq)) %>% head(20) %>%
ggplot(aes(x = reorder(Var1, -Freq), y = Freq,
fill = Var1, label = Freq)) +
geom_bar(stat = "identity", show.legend = F) +
geom_label(show.legend = F) +
theme(axis.text.x = element_text(angle = 40, size = 15)) +
labs(x = "Country", y = "The number of Airlines",
title = "Top 20 Countries that have most airlines")
country.airport <- data.frame(table(airport$Country))
names(country.airport)[2] <- "Airport"
country.airline <- data.frame(table(airline$country))
names(country.airline)[2] <- "Airline"
lineports <- merge(country.airport, country.airline, by = "Var1")
lineports %>%
ggplot(aes(x = Airport, y = Airline)) +
geom_point(show.legend = F) +
geom_smooth() +
labs(title = "Airports vs Airlines") +
scale_x_continuous(trans = 'log10',
breaks = c(10, 100, 500, 1000))
There is a positive correlation between the number of airport and airline.